This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

options(scipen = 999)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom        1.0.0     ✔ rsample      1.1.0
## ✔ dials        1.0.0     ✔ tune         1.0.1
## ✔ infer        1.0.3     ✔ workflows    1.1.0
## ✔ modeldata    1.0.1     ✔ workflowsets 1.0.0
## ✔ parsnip      1.0.2     ✔ yardstick    1.1.0
## ✔ recipes      1.0.2     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
library(solitude) # -- new package 
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggpubr)
library(skimr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(vip)
## 
## Attaching package: 'vip'
## 
## The following object is masked from 'package:utils':
## 
##     vi
library(NeuralNetTools)
library(DALEX)    # new 
## Welcome to DALEX (version: 2.4.2).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## 
## 
## Attaching package: 'DALEX'
## 
## The following object is masked from 'package:dplyr':
## 
##     explain
library(DALEXtra) # new 
labels<-read_csv("smilegate_1M_labels.csv",na=c("null","nan","","NA","n/a")) %>% clean_names()
## Rows: 1000000 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): transaction_id, EVENT_LABEL
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skim(labels)
Data summary
Name labels
Number of rows 1000000
Number of columns 2
_______________________
Column type frequency:
character 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
transaction_id 0 1 12 18 0 1000000 0
event_label 0 1 5 5 0 2 0
transaction<-read_csv("smilegate_1M_transactions.csv",na=c("null","nan","","NA","n/a")) %>% clean_names()
## Rows: 1000000 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): ip_address, email_address, transaction_id
## dbl  (9): registration_deposit, mean_deposit, mean_txn, monetary_returns_5da...
## dttm (1): EVENT_TIMESTAMP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skim(transaction)
Data summary
Name transaction
Number of rows 1000000
Number of columns 13
_______________________
Column type frequency:
character 3
numeric 9
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ip_address 0 1 9 15 0 3531 0
email_address 0 1 15 32 0 3913 0
transaction_id 0 1 12 18 0 1000000 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
registration_deposit 0 1 4818.73 1322.51 10 3790 4785 5840 9999 ▁▅▇▃▁
mean_deposit 0 1 499.24 124.59 10 409 502 591 999 ▁▃▇▃▁
mean_txn 0 1 511.51 95.11 10 448 512 575 999 ▁▂▇▂▁
monetary_returns_5day 0 1 109.92 26.32 0 91 113 130 199 ▁▂▇▇▁
monetary_returns_15day 0 1 101.47 24.78 0 83 101 120 199 ▁▃▇▃▁
monetary_returns_30day 0 1 87.09 18.31 0 75 87 99 199 ▁▅▇▁▁
game_cash_count_3day 0 1 49.57 12.79 1 40 49 59 99 ▁▅▇▃▁
distinct_account_3day 0 1 50.16 8.13 1 45 50 55 99 ▁▁▇▁▁
account_id 0 1 54976.41 25979.38 10000 32488 54938 77502 99999 ▇▇▇▇▇

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
event_timestamp 0 1 2021-12-03 02:40:53 2022-12-03 08:25:20 2022-06-03 19:56:53 984283
head(transaction)
set.seed(12)

dtsplit<- initial_split(transaction,prop = 0.7)
isotrain<- training(dtsplit)
isotest<- testing(dtsplit)
isotrain1<-isotrain %>% dplyr::select_if(is.numeric)
isotest1<- isotest %>% dplyr::select_if(is.numeric)
so_recipe <- recipe( ~  registration_deposit+mean_deposit+mean_txn+monetary_returns_5day+
                       monetary_returns_15day+monetary_returns_30day+game_cash_count_3day+distinct_account_3day, isotrain1) %>% 
  step_impute_median(all_numeric_predictors()) %>%
#  step_normalize(all_numeric_predictors())  %>%
  prep()


bake_iso<-bake(so_recipe,isotrain1)
iso_forest <- isolationForest$new(
  sample_size = 2048,
  num_trees = 100,
  max_depth = 12)


iso_forest$fit(bake_iso)
## INFO  [17:15:57.214] Building Isolation Forest ...
## INFO  [17:16:05.272] done
## INFO  [17:16:05.279] Computing depth of terminal nodes ...
## INFO  [17:16:05.488] done
## INFO  [17:16:17.494] Completed growing isolation forest
pred_train <- iso_forest$predict(bake_iso)

pred_train %>%
  ggplot(aes(average_depth)) +
  geom_histogram(bins=20) + 
  geom_vline(xintercept = 9.9, linetype="dotted", 
                color = "blue", size=1.5) + 
  labs(title="Isolation Forest Average Tree Depth")

pred_train %>%
  ggplot(aes(anomaly_score)) +
  geom_histogram(bins=20) + 
  geom_vline(xintercept = 0.62, linetype="dotted", 
                color = "blue", size=1.5) + 
  labs(title="Isolation Forest Anomaly Score Above 0.62")

bake_test<-bake(so_recipe,isotest1)
pred_test <- iso_forest$predict(bake_test)

pred_test %>%
  ggplot(aes(average_depth)) +
  geom_histogram(bins=20) + 
  geom_vline(xintercept = 9.9, linetype="dotted", 
                color = "blue", size=1.5) + 
  labs(title="Isolation Forest Average Tree Depth")

pred_test %>%
  ggplot(aes(anomaly_score)) +
  geom_histogram(bins=20) + 
  geom_vline(xintercept = 0.62, linetype="dotted", 
                color = "blue", size=1.5) + 
  labs(title="Isolation Forest Anomaly Score Above 0.62")

train_pred <- bind_cols(pred_train,bake_iso) %>%
  mutate(anomaly = as.factor(if_else(average_depth <=9.9, "Anomaly","Normal")))

train_pred %>%
  arrange(average_depth) %>%
  count(anomaly)
train_pred_score <- bind_cols(pred_train,bake_iso) %>%
  mutate(anomaly = as.factor(if_else(anomaly_score >=0.62, "Anomaly","Normal")))

train_pred_score %>%
  arrange(anomaly_score) %>%
  count(anomaly)
synth_train <- bind_cols(pred_train, isotrain) %>%
  mutate(synthetic_target = as.factor(
      if_else(average_depth <= 9.9,"fraud","legit")),
         synthetic_target2 = as.factor(
           if_else(anomaly_score >= 0.62,"fraud","legit"))
         )
synth_train%>%
count(synthetic_target,synthetic_target2)
synth_test <- bind_cols(pred_test, isotest) %>%
  mutate(synthetic_target = as.factor(
      if_else(average_depth <= 9.9,"fraud","legit")),
         synthetic_target2 = as.factor(
           if_else(anomaly_score >= 0.62,"fraud","legit"))
         )
synth_test%>%
count(synthetic_target,synthetic_target2)
train_w_label<-synth_train %>%
  inner_join(labels,by="transaction_id")%>%
  mutate(event_label=factor(event_label))

test_w_label<-synth_test %>%
  inner_join(labels,by="transaction_id")%>%
  mutate(event_label=factor(event_label))
# precision,recall
train_w_label %>%
  yardstick::precision(event_label, synthetic_target)%>%
mutate(part="train")%>%
  bind_rows(train_w_label %>%
    yardstick::recall(event_label, synthetic_target)%>%
    mutate(part="train"))%>%
  bind_rows(test_w_label %>%
  yardstick::precision(event_label, synthetic_target)%>%
    mutate(part="test")%>%
  bind_rows(test_w_label %>%
  yardstick::recall(event_label, synthetic_target)%>%mutate(part="test")))
# precision,recall
train_w_label %>%
  yardstick::precision(event_label, synthetic_target2)%>%
mutate(part="train")%>%
  bind_rows(train_w_label %>%
    yardstick::recall(event_label, synthetic_target2)%>%
    mutate(part="train"))%>%
  bind_rows(test_w_label %>%
  yardstick::precision(event_label, synthetic_target2)%>%
    mutate(part="test")%>%
  bind_rows(test_w_label %>%
  yardstick::recall(event_label, synthetic_target2)%>%mutate(part="test")))
train_w_label$synthetic_target2<-as.factor(train_w_label$synthetic_target2)
model_recipe <- recipe(synthetic_target2 ~  registration_deposit+mean_deposit+mean_txn+monetary_returns_5day+
                       monetary_returns_15day+monetary_returns_30day+game_cash_count_3day+distinct_account_3day,data = train_w_label) %>% 
prep()


bake_train<-bake(model_recipe, train_w_label)
skim(bake_train)
Data summary
Name bake_train
Number of rows 700000
Number of columns 9
_______________________
Column type frequency:
factor 1
numeric 8
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
synthetic_target2 0 1 FALSE 2 leg: 695605, fra: 4395

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
registration_deposit 0 1 4819.74 1321.90 10 3791 4787 5841 9999 ▁▅▇▃▁
mean_deposit 0 1 499.17 124.59 10 409 502 591 999 ▁▃▇▃▁
mean_txn 0 1 511.65 95.07 10 448 512 576 999 ▁▂▇▂▁
monetary_returns_5day 0 1 109.92 26.32 0 91 113 130 199 ▁▂▇▇▁
monetary_returns_15day 0 1 101.45 24.78 0 83 101 120 196 ▁▃▇▅▁
monetary_returns_30day 0 1 87.12 18.30 0 75 87 99 199 ▁▅▇▁▁
game_cash_count_3day 0 1 49.57 12.79 1 40 49 59 99 ▁▅▇▃▁
distinct_account_3day 0 1 50.17 8.13 1 45 50 55 99 ▁▁▇▁▁
rf_model <- rand_forest(trees = 100, mtry=13,min_n = 10) %>%
   set_mode("classification") %>%
   set_engine("ranger", importance="permutation")
#num.threads = 8 , max.depth = 10
rf_workflow <- workflow() %>%
  add_recipe(model_recipe) %>%
  add_model(rf_model) %>%
    fit(train_w_label)
## Warning: 13 columns were requested but there were 8 predictors in the data. 8
## will be used.
rf_workflow
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: rand_forest()
## 
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ───────────────────────────────────────────────────────────────────────
## Ranger result
## 
## Call:
##  ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~13,      x), num.trees = ~100, min.node.size = min_rows(~10, x), importance = ~"permutation",      num.threads = 1, verbose = FALSE, seed = sample.int(10^5,          1), probability = TRUE) 
## 
## Type:                             Probability estimation 
## Number of trees:                  100 
## Sample size:                      700000 
## Number of independent variables:  8 
## Mtry:                             8 
## Target node size:                 10 
## Variable importance mode:         permutation 
## Splitrule:                        gini 
## OOB prediction error (Brier s.):  0.001947785
options(yardstick.event_first = TRUE)
# score training
predict(rf_workflow, train_w_label, type = "prob") %>%
  bind_cols(predict(rf_workflow, train_w_label, type = "class")) %>%
  mutate(part = "train_w_label") %>%
  bind_cols(., train_w_label) -> scored_train

predict(rf_workflow, test_w_label, type = "prob") %>%
  bind_cols(predict(rf_workflow,  test_w_label, type = "class")) %>%
  mutate(part = "test_w_label") %>%
  bind_cols(., test_w_label) -> scored_test
# precision,recall
scored_train %>%
  yardstick::precision(event_label, .pred_class)%>%
mutate(part="train")%>%
    bind_rows(scored_test %>%
  yardstick::precision(event_label, .pred_class)%>%
    mutate(part="test")%>%
  bind_rows(scored_train %>%
    yardstick::recall(event_label, .pred_class)%>%
    mutate(part="train"))%>%
  bind_rows(scored_test %>%
  yardstick::recall(event_label, .pred_class)%>%mutate(part="test")))
## Warning: The `yardstick.event_first` option has been deprecated as of yardstick 0.0.7 and will be completely ignored in a future version.
## Instead, set the following argument directly in the metric function:
## `options(yardstick.event_first = TRUE)`  -> `event_level = 'first'` (the default)
## `options(yardstick.event_first = FALSE)` -> `event_level = 'second'`
## This warning is displayed once per session.
## Metrics (AUC / Accuracy / Log Loss)
bind_rows (scored_train, scored_test)  %>%
  group_by(part) %>%
  metrics(event_label, .pred_fraud, estimate = .pred_class) %>%
  filter(.metric %in% c('accuracy', 'roc_auc', 'mn_log_loss')) %>%
  pivot_wider(names_from = .metric, values_from = .estimate)
model_recipe_original <- recipe(event_label ~   registration_deposit+mean_deposit+mean_txn+monetary_returns_5day+
                       monetary_returns_15day+monetary_returns_30day+game_cash_count_3day+distinct_account_3day,data = train_w_label) %>% 
prep()


bake_train<-bake(model_recipe_original, train_w_label)
skim(bake_train)
Data summary
Name bake_train
Number of rows 700000
Number of columns 9
_______________________
Column type frequency:
factor 1
numeric 8
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
event_label 0 1 FALSE 2 leg: 689649, fra: 10351

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
registration_deposit 0 1 4819.74 1321.90 10 3791 4787 5841 9999 ▁▅▇▃▁
mean_deposit 0 1 499.17 124.59 10 409 502 591 999 ▁▃▇▃▁
mean_txn 0 1 511.65 95.07 10 448 512 576 999 ▁▂▇▂▁
monetary_returns_5day 0 1 109.92 26.32 0 91 113 130 199 ▁▂▇▇▁
monetary_returns_15day 0 1 101.45 24.78 0 83 101 120 196 ▁▃▇▅▁
monetary_returns_30day 0 1 87.12 18.30 0 75 87 99 199 ▁▅▇▁▁
game_cash_count_3day 0 1 49.57 12.79 1 40 49 59 99 ▁▅▇▃▁
distinct_account_3day 0 1 50.17 8.13 1 45 50 55 99 ▁▁▇▁▁
#num.threads = 8 , max.depth = 10
rf_workflow_original <- workflow() %>%
  add_recipe(model_recipe_original) %>%
  add_model(rf_model) %>%
    fit(train_w_label)
## Warning: 13 columns were requested but there were 8 predictors in the data. 8
## will be used.
rf_workflow_original
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: rand_forest()
## 
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ───────────────────────────────────────────────────────────────────────
## Ranger result
## 
## Call:
##  ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~13,      x), num.trees = ~100, min.node.size = min_rows(~10, x), importance = ~"permutation",      num.threads = 1, verbose = FALSE, seed = sample.int(10^5,          1), probability = TRUE) 
## 
## Type:                             Probability estimation 
## Number of trees:                  100 
## Sample size:                      700000 
## Number of independent variables:  8 
## Mtry:                             8 
## Target node size:                 10 
## Variable importance mode:         permutation 
## Splitrule:                        gini 
## OOB prediction error (Brier s.):  0.006761302
options(yardstick.event_first = TRUE)
# score training
predict(rf_workflow_original, train_w_label, type = "prob") %>%
  bind_cols(predict(rf_workflow_original, train_w_label, type = "class")) %>%
  mutate(part = "train") %>%
  bind_cols(., train_w_label) -> scored_train_original

predict(rf_workflow_original, test_w_label, type = "prob") %>%
  bind_cols(predict(rf_workflow_original,  test_w_label, type = "class")) %>%
  mutate(part = "test") %>%
  bind_cols(., test_w_label) -> scored_test_original
# precision,recall
scored_train_original %>%
  yardstick::precision(event_label, .pred_class)%>%
mutate(part="train")%>%
  bind_rows(scored_test_original %>%
  yardstick::precision(event_label, .pred_class)%>%
    mutate(part="test")%>%
  bind_rows(scored_train_original %>%
    yardstick::recall(event_label, .pred_class)%>%
    mutate(part="train"))%>%
  bind_rows(scored_test_original %>%
  yardstick::recall(event_label, .pred_class)%>%mutate(part="test")))
## Metrics (AUC / Accuracy / Log Loss)
bind_rows (scored_train_original, scored_test_original)  %>%
  group_by(part) %>%
  metrics(event_label, .pred_fraud, estimate = .pred_class) %>%
  filter(.metric %in% c('accuracy', 'roc_auc', 'mn_log_loss')) %>%
  pivot_wider(names_from = .metric, values_from = .estimate)
train_w_label_sample <- train_w_label %>% sample_n(1000)




rf_explainer_surrogate <- 
  explain_tidymodels(
    rf_workflow,   # fitted workflow object 
    data = train_w_label_sample,    # original training data
    y = train_w_label_sample$event_label, # predicted outcome 
    label = "rf_explainer_surrogate",
    verbose = FALSE
  )
## Warning in Ops.factor(y, predict_function(model, data)): '-' not meaningful for
## factors
explain_prediction <- function(single_record){
 
# step 3. run the explainer 
rf_breakdown_surrogate <- predict_parts(explainer = rf_explainer_surrogate, 
                               new_observation = single_record,
                               type="break_down"
                               )

# step 4. plot it. 
# you notice you don't get categorical values ...  
rf_breakdown_surrogate %>% plot()%>%print()

# --- more involved explanations with categories. ---- 

# step 4a.. convert breakdown to a tibble so we can join it
rf_breakdown_surrogate %>%
  as_tibble() -> breakdown_data_surrogate 

# step 4b. transpose your single record prediction 
single_record %>% 
 gather(key="variable_name",value="value") -> prediction_data_surrogate 

# step 4c. get a predicted probability for plot 
prediction_prob_surrogate <- single_record[,".pred_fraud"] %>% pull()

# step 5. plot it.
print(breakdown_data_surrogate %>% 
  inner_join(prediction_data_surrogate) %>%
  mutate(contribution = round(contribution,3),) %>%
  filter(variable_name != "intercept") %>%
  mutate(variable = paste(variable_name,value,sep = ": ")) %>% 
  ggplot(aes(y=reorder(variable, contribution), x= contribution, fill=sign)) +
  geom_col() + 
  geom_text(aes(label=contribution), 
          size=4,
            position=position_dodge(width=0.7),
            vjust=0.5,
            )+
  labs(
    title = "DALEX explainations",
    subtitle = paste("predicted:",as.character(round(prediction_prob_surrogate,3))),
                    x="contribution",
                    y="features")
)
}

top_10_surrogate <- scored_test %>%
  filter(.pred_class == event_label) %>%
  filter(event_label == "fraud")%>%
  slice_max(.pred_fraud,n=10)%>%
  head(10)


for (row in 1:nrow(top_10_surrogate)) {
    s_record_surrogate <- top_10_surrogate[row,]
    explain_prediction(s_record_surrogate)
} 
## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

rf_explainer_original <- 
  explain_tidymodels(
    rf_workflow_original,   # fitted workflow object 
    data = train_w_label_sample,    # original training data
    y = train_w_label_sample$event_label, # predicted outcome 
    label = "rf_explainer_original",
    verbose = FALSE
  )
## Warning in Ops.factor(y, predict_function(model, data)): '-' not meaningful for
## factors
explain_prediction2 <- function(single_record){
# step 3. run the explainer 
rf_breakdown_original <- predict_parts(explainer = rf_explainer_original, 
                               new_observation = single_record,
                               type="break_down"
                               )

# step 4. plot it. 
# you notice you don't get categorical values ...  
rf_breakdown_original %>% plot()%>%print()

# --- more involved explanations with categories. ---- 

# step 4a.. convert breakdown to a tibble so we can join it
rf_breakdown_original %>%
  as_tibble() -> breakdown_data_origianl 

# step 4b. transpose your single record prediction 
single_record %>% 
 gather(key="variable_name",value="value") -> prediction_data_original

# step 4c. get a predicted probability for plot 
prediction_prob_original <- single_record[,".pred_fraud"] %>% pull()

# step 5. plot it.
print(breakdown_data_origianl %>% 
  inner_join(prediction_data_original) %>%
  mutate(contribution = round(contribution,3),) %>%
  filter(variable_name != "intercept") %>%
  mutate(variable = paste(variable_name,value,sep = ": ")) %>% 
  ggplot(aes(y=reorder(variable, contribution), x= contribution, fill=sign)) +
  geom_col() + 
  geom_text(aes(label=contribution), 
          size=4,
            position=position_dodge(width=0.7),
            vjust=0.5,
            )+
  labs(
    title = "DALEX explainations",
    subtitle = paste("predicted:",as.character(round(prediction_prob_original,3))),
                    x="contribution",
                    y="features")
)
  }

top_10_original <- scored_test_original %>%
  filter(.pred_class == event_label) %>%
  filter(event_label=="fraud")%>%
  slice_max(.pred_fraud,n=10)%>%
  head(10)


for (row in 1:nrow(top_10_original)) {
    s_record_original <- top_10_original[row,]
  explain_prediction2(s_record_original)
} 
## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

## Warning: attributes are not identical across measure variables;
## they will be dropped
## Joining, by = "variable_name"

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.